#!/usr/bin/python3
# coding=utf-8
import openai

xinference_url = "10.26.33.159"
xinference_port = 9997
model_entities_uid = "tt_Meta-Llama-3.1-8B-Instruct"
client = openai.Client(
    api_key="cannot be empty",
    base_url=f"http://{xinference_url}:{xinference_port}/v1"
)


def chat_model(prompt, model_uid=model_entities_uid, max_tokens=512, temperature=0.7):
    response = client.chat.completions.create(
        model=model_uid,
        messages=[{"role": "user", "content": prompt}],
        max_tokens=max_tokens,
        temperature=temperature)

    return response.choices[0].message.content



if __name__ == '__main__':
    for i in range(10):
        print(chat_model("who are you"))